########################################
### This program reproduces TABLE 7  ###
### from Duan et al. (2020):         ###
########################################
rm(list = ls())
# install packages (if not installed)
list.of.packages <- c("haven", "dplyr","leaps", "metafor","lmtest","multiwayvcov","sandwich","leaps")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# set working directory to the folder where you downloaded all your files
setwd("P:/My Documents/My Files/UCMETA/UC META-ANALYSIS WORKSHOP (March 2022)")
library(haven) # need this package to load STATA data (.dta) file
library(dplyr) # need this package to organize dataset
library(metafor) # need this package to use meta estimators
library(lmtest) # need this package to correct standatd errors
library(multiwayvcov) # need this package to correct standatd errors
library(sandwich) # need this package to correct standatd errors
library(leaps) # need this package to do model selection

### Loading dataset
DT <- as.data.frame(read_dta('meta-orig.dta'))

###############################
### Characteristics of the data
###############################
DT2 <- DT  %>% rename(id = idstudy,
                      all = spill_other_firm,
                      spill_reg = sameregion,
                      spill_ind = sameindustry,
                      spill_ex = spill_exporter,
                      value = spill_value,
                      employment = spill_employment,
                      output = spill_output,
                      number = spill_number,
                      asset = spill_assets,
                      rdsp = spill_rd,
                      other_ms = spill_other,
                      ogls = olsgls,
                      plt = probit,
                      nsph = nonsphericalerrors,
                      endg = endogeneity,
                      catg = categorical,
                      sps = sampleselection,
                      fe = panel_fe,
                      agg = aggregate,
                      cs = crosssection,
                      labq = labourquality,
                      prod = productivity,
                      pubyear = publicationyear)


DT2$all[DT2$foreign==1] <- 1
DT2 <- DT2 %>% rename(nondomestic = all)
DT2$other_ms[DT2$asset==1] <- 1
DT2$impact[is.na(DT2$impact)] <- 0
DT2$firm_level <- 1*(DT2$agg==0)


###############################
### DEFINITION OF BASIC VARIABLES
###############################
DT2$tstat <- DT2$t
DT2$df <- DT2$samplesize - DT2$k
DT2$pcc <- DT2$tstat/sqrt(DT2$tstat^2+DT2$df)
DT2$varpcc <- (1-DT2$pcc^2)/DT2$df
DT2$sepcc <- sqrt(DT2$varpcc)
DT2$avgyear <- (DT2$end+DT2$start)/2
DT2$tspan <- DT2$end-DT2$start+1


###############################
### Kick out extreme values of PCC
###############################
rng <- as.numeric(quantile(DT2$pcc, probs = c(0.01,0.99), na.rm = TRUE, type=1))
DT2 <- DT2[(rng[1]<DT2$pcc & DT2$pcc<rng[2]),]


###############################
### Calculating study weights
###############################
NumEst <- as.data.frame(DT2 %>%
                          group_by(id) %>%
                          summarise(
                            numberests = length(pcc)
                          ))
DT2 <- right_join(DT2, NumEst, by=c('id'))


###############################
### Calculating std error for RE estimator
###############################
# Note: this next step takes a lont time to run
REML <- rma(pcc~1, sei=sepcc, data=DT2, method='REML')
summary(REML)
DT2$seRE <- sqrt(DT2$sepcc^2+REML$tau2)


###############################
### Calculating the four weights
###############################
DT2$FEweight1 <- 1/(DT2$sepcc^2)
DT2$REweight1 <- 1/(DT2$seRE^2)
DT2$FEweight2 <- 1/(DT2$numberests*DT2$sepcc^2)
DT2$REweight2 <- 1/(DT2$numberests*DT2$seRE^2)


###############################
###############################
### FIXED EFFECTS           ###
###############################
###############################
### THIS SECTION GIVES EQUAL WEIGHT TO EACH OBSERVATION

###############################
### TABLE 7: FixedEffects(Weight1) // All Control Variables
###############################
model <- 'pcc~sepcc+spill_ex+spill_reg+spill_ind+spill_fdi+number+value+employment+manufacturing+avgyear+firm_level+domestic+oecd+china+binary+plt+endg+sps+fe+size+prod+labq+capital+rd+published+impact+citation'
FEWLS1 <- lm(model, weights=FEweight1, data=DT2)
FEWLS1_clustered <- coeftest(FEWLS1, cluster.vcov(FEWLS1, DT2$id)) 
print(FEWLS1_clustered)


###############################
### TABLE 7: FixedEffects(Weight1) // Stepwise Regression
###############################
## Here we use backwards stepwise regression selecting on the best regression using the BIC criterion
## We lock in the 4 spillover variables and the se variable: Spill_ind Spill_reg Spill_FDI Spill_ex 
## Making dataset for Stepwise regression
regDT <- DT2[,c('pcc','number','value','employment',
                'manufacturing','avgyear','firm_level',
                'domestic','oecd','china','binary','plt',
                'endg','sps','fe','size','prod','labq',
                'capital','rd','published','impact',
                'citation','sepcc','spill_ex','spill_reg',
                'spill_ind','spill_fdi')]

## Backward estimation with weight=FEweight1
models <- regsubsets(pcc ~ ., data=regDT, method = "backward",
                     force.in=c('sepcc','spill_ex','spill_reg','spill_ind','spill_fdi'),
                     intercept=T, weights=DT2$FEweight1, nvmax = (ncol(regDT)-1))
## Choose a model based on bic
estBeta <- coef(models, which.min(summary(models)$bic))
estseBeta <- sqrt(diag(vcov(models,which.min(summary(models)$bic))))
esttBeta <- estBeta/estseBeta
print(cbind.data.frame(EstCoeff=estBeta, Std.Err=estseBeta, t.Stat=esttBeta))

### NOTE: You first have to see which variables the stepwise procedure above selects
### before specifying the regression below

varList <- rownames(as.data.frame(estBeta))
varList <- varList[-1]
model <- paste0('pcc~',paste(varList, collapse = '+'))

FEWLS1_select <- lm(model, weights=FEweight1, data=DT2)
FEWLS1_select_clustered <- coeftest(FEWLS1_select, cluster.vcov(FEWLS1_select, DT2$id)) 
print(FEWLS1_select_clustered)


### THIS SECTION GIVES EQUAL WEIGHT TO EACH STUDY
###############################
### TABLE 7: FixedEffects(Weight2) // All Control Variables
###############################
model <- 'pcc~sepcc+spill_ex+spill_reg+spill_ind+spill_fdi+number+value+employment+manufacturing+avgyear+firm_level+domestic+oecd+china+binary+plt+endg+sps+fe+size+prod+labq+capital+rd+published+impact+citation'
FEWLS2 <- lm(model, weights=FEweight2, data=DT2)
FEWLS2_clustered <- coeftest(FEWLS2, cluster.vcov(FEWLS2, DT2$id)) 
print(FEWLS2_clustered)


###############################
### TABLE 7: FixedEffects(Weight2) // Stepwise Regression
###############################
## Backward estimation
models <- regsubsets(pcc ~ ., data=regDT, method = "backward",
                     force.in=c('sepcc','spill_ex','spill_reg','spill_ind','spill_fdi'),
                     intercept=T, weights=DT2$FEweight2, nvmax = (ncol(regDT)-1))
## Choose a model based on bic
estBeta <- coef(models, which.min(summary(models)$bic))
estseBeta <- sqrt(diag(vcov(models,which.min(summary(models)$bic))))
esttBeta <- estBeta/estseBeta
print(cbind.data.frame(EstCoeff=estBeta, Std.Err=estseBeta, t.Stat=esttBeta))

### NOTE: You first have to see which variables the stepwise procedure above selects
### before specifying the regression below
varList <- rownames(as.data.frame(estBeta))
varList <- varList[-1]
model <- paste0('pcc~',paste(varList, collapse = '+'))

FEWLS2_select <- lm(model, weights=FEweight1, data=DT2)
FEWLS2_select_clustered <- coeftest(FEWLS2_select, cluster.vcov(FEWLS2_select, DT2$id)) 
print(FEWLS2_select_clustered)



###############################
###############################
### RANDOM EFFECTS          ###
###############################
###############################

###############################
### TABLE 7: RandomEffects(Weight1) // All Control Variables
###############################
model <- 'pcc~sepcc+spill_ex+spill_reg+spill_ind+spill_fdi+number+value+employment+manufacturing+avgyear+firm_level+domestic+oecd+china+binary+plt+endg+sps+fe+size+prod+labq+capital+rd+published+impact+citation'
REWLS1 <- lm(model, weights=REweight1, data=DT2)
REWLS1_clustered <- coeftest(REWLS1, cluster.vcov(REWLS1, DT2$id)) 
print(REWLS1_clustered)


###############################
### TABLE 7: RandomEffects(Weight1) // Stepwise Regression
###############################
## Here we use backwards stepwise regression selecting on the best regression using the BIC criterion
## We lock in the 4 spillover variables and the se variable: Spill_ind Spill_reg Spill_FDI Spill_ex 
## Backward estimation
models <- regsubsets(pcc ~ ., data=regDT, method = "backward",
                     force.in=c('sepcc','spill_ex','spill_reg','spill_ind','spill_fdi'),
                     intercept=T, weights=DT2$REweight1, nvmax = (ncol(regDT)-1))
## Choose a model based on bic
estBeta <- coef(models, which.min(summary(models)$bic))
estseBeta <- sqrt(diag(vcov(models,which.min(summary(models)$bic))))
esttBeta <- estBeta/estseBeta
print(cbind.data.frame(EstCoeff=estBeta, Std.Err=estseBeta, t.Stat=esttBeta))

### NOTE: You first have to see which variables the stepwise procedure above selects
### before specifying the regression below
varList <- rownames(as.data.frame(estBeta))
varList <- varList[-1]
model <- paste0('pcc~',paste(varList, collapse = '+'))

REWLS1_select <- lm(model, weights=REweight1, data=DT2)
REWLS1_select_clustered <- coeftest(REWLS1_select, cluster.vcov(REWLS1_select, DT2$id)) 
print(REWLS1_select_clustered)


###############################
### TABLE 7: RandomEffects(Weight2) // All Control Variables
###############################
model <- 'pcc~sepcc+spill_ex+spill_reg+spill_ind+spill_fdi+number+value+employment+manufacturing+avgyear+firm_level+domestic+oecd+china+binary+plt+endg+sps+fe+size+prod+labq+capital+rd+published+impact+citation'
REWLS2 <- lm(model, weights=REweight2, data=DT2)
REWLS2_clustered <- coeftest(REWLS2, cluster.vcov(REWLS2, DT2$id)) 
print(REWLS2_clustered)


###############################
### TABLE 7: RandomEffects(Weight2) // Stepwise Regression
###############################
## Here we use backwards stepwise regression selecting on the best regression using the BIC criterion
## We lock in the 4 spillover variables and the cse variable: Spill_ind Spill_reg Spill_FDI Spill_ex 
## Backward estimation
models <- regsubsets(pcc ~ ., data=regDT, method = "backward",
                     force.in=c('sepcc','spill_ex','spill_reg','spill_ind','spill_fdi'),
                     intercept=T, weights=DT2$REweight2, nvmax = (ncol(regDT)-1))
## Choose a model based on bic
estBeta <- coef(models, which.min(summary(models)$bic))
estseBeta <- sqrt(diag(vcov(models,which.min(summary(models)$bic))))
esttBeta <- estBeta/estseBeta
print(cbind.data.frame(EstCoeff=estBeta, Std.Err=estseBeta, t.Stat=esttBeta))

### NOTE: You first have to see which variables the stepwise procedure above selects
### before specifying the regression below
varList <- rownames(as.data.frame(estBeta))
varList <- varList[-1]
model <- paste0('pcc~',paste(varList, collapse = '+'))

REWLS2_select <- lm(model, weights=REweight2, data=DT2)
REWLS2_select_clustered <- coeftest(REWLS2_select, cluster.vcov(REWLS2_select, DT2$id)) 
print(REWLS2_select_clustered)


## End of Part II Code